Preparation

Load data, helper functions

source("analysis_helpers.R")

tracking_data <- load_app_sessions_tracking_data(c("f33ac92fb8"))

Tracking session duration

track_sess_times <- tracking_sess_times(tracking_data)

plot_tracking_sess_durations(track_sess_times)

Limit tracking session events to exercise time frame

starttime <- ymd_hm("2023-11-15 13:15", tz = "CET")    # exercise start - 30 min.
endtime <- ymd_hm("2023-11-15 17:30", tz = "CET")      # exercise end + 30 min.    

tracking_data <- filter(tracking_data, event_time >= starttime, event_time <= endtime) |>
    group_by(track_sess_id) |>
    mutate(track_sess_start = min(track_sess_start, min(event_time)),
           track_sess_end = min(track_sess_end, max(event_time))) |>
    ungroup()

track_sess_times <- tracking_sess_times(tracking_data)

plot_tracking_sess_durations(track_sess_times)

plot_tracking_sess_durations(track_sess_times, by_user_code = TRUE)

plot_tracking_sess_durations_hist(track_sess_times, binwidth = 0.25)

  • long interaction time
median(track_sess_times$duration) |> as.double(units = "mins")
## [1] 41.37412

Number of observations, unique users and event types

  • many observations
dim(tracking_data)
## [1] 265860     40
  • number of unique users:
length(unique(tracking_data$user_app_sess_code))
## [1] 15
  • each user created only one tracking session:
length(unique(tracking_data$track_sess_id))
## [1] 15
  • one user used a tablet, all others a laptop or desktop PC:
group_by(tracking_data, track_sess_id) |>
    distinct(form_factor) |>
    ungroup() |>
    count(form_factor)
## # A tibble: 2 × 2
##   form_factor     n
##   <fct>       <int>
## 1 desktop        14
## 2 tablet          1
  • number of events per event type:
res <- plot_event_type_counts(tracking_data)
res$plot

res <- plot_event_type_counts(tracking_data)
res$table
## # A tibble: 10 × 2
##    type                 n
##    <fct>            <int>
##  1 chapter             56
##  2 click             1734
##  3 contentscroll    89059
##  4 ex_result           48
##  5 ex_submit           48
##  6 input              946
##  7 input_change        65
##  8 mouse           173163
##  9 question_submit    173
## 10 scroll             568
  • number of events per event type per user:
res <- plot_event_type_counts_per_user(tracking_data)
res$plot

  • some students apparently did not submit code exercises, but all submitted answers on questions
submit_event_counts <- filter(res$table, type %in% c("ex_result", "ex_submit", "question_submit")) |>
    pivot_wider(names_from = type, values_from = n, values_fill = 0)
submit_event_counts    
## # A tibble: 15 × 4
##    user_code        question_submit ex_result ex_submit
##    <fct>                      <int>     <int>     <int>
##  1 0383a657ecfa9596              14         0         0
##  2 1194a5f2318f9118               8         0         0
##  3 122a85ef1e6f8f54              11         0         0
##  4 46b1f0eca5d6c0c2              23         9         9
##  5 521ec99409c16a5c              12         0         0
##  6 52a9b29ca5d8527c               9         0         0
##  7 5ade00fb32f716df              12        10        10
##  8 646e65ff535d24e1              17         0         0
##  9 a4be190adbd9dc41              10         0         0
## 10 c439271a1c7cbb67               9        10        10
## 11 cf98f5634c340955              11         0         0
## 12 eaab3f0249108fff              11         5         5
## 13 ef0e35f9ae0e35a4               9         0         0
## 14 f077ce68e979ee1f               8        12        12
## 15 f78ba1efd64104b4               9         2         2
mutate(submit_event_counts, ex = ex_result + ex_submit) |>
    select(-c(ex_result, ex_submit)) |>
    filter(question_submit == 0 | ex == 0)
## # A tibble: 9 × 3
##   user_code        question_submit    ex
##   <fct>                      <int> <int>
## 1 0383a657ecfa9596              14     0
## 2 1194a5f2318f9118               8     0
## 3 122a85ef1e6f8f54              11     0
## 4 521ec99409c16a5c              12     0
## 5 52a9b29ca5d8527c               9     0
## 6 646e65ff535d24e1              17     0
## 7 a4be190adbd9dc41              10     0
## 8 cf98f5634c340955              11     0
## 9 ef0e35f9ae0e35a4               9     0

Question and code exercise submissions

  • proportion of correct answers per question:
quest_data <- question_submit_data(tracking_data)
res <- plot_question_prop_correct(quest_data)
res$plot

res$table
## # A tibble: 7 × 4
##   ex_label           n prop_correct sd_correct
##   <fct>          <int>        <dbl>      <dbl>
## 1 calc_sens         28        0.536     0.0942
## 2 calc_spec         29        0.517     0.0928
## 3 clinicalstudy1    17        0.882     0.0781
## 4 clinicalstudy2    23        0.652     0.0993
## 5 confmat           44        0.341     0.0715
## 6 P_T_K             18        0.833     0.0878
## 7 ppv_sens          14        0.357     0.128
  • number of tries per question:
quiz_tries <- question_or_exercise_submit_tries(quest_data)
plot_question_n_tries(quiz_tries)$plot

  • interestingly, each question was finally answered correctly – no one gave up in between:
all(quiz_tries$ex_correct)
## [1] TRUE
quiz_prop_correct_per_try <- prop_correct_in_ith_try(quest_data)
plot_prop_correct_per_try(quiz_prop_correct_per_try, "Proportion of correct quiz answers per number of tries")

  • only a few coding exercises were tried, and even fewer succeeded
ex_data <- exercise_result_data(tracking_data)
ex_tries <- question_or_exercise_submit_tries(ex_data)
group_by(ex_tries, ex_label) |>
    summarise(n = n(),
              prop_correct = mean(ex_correct))
## # A tibble: 2 × 3
##   ex_label           n prop_correct
##   <fct>          <int>        <dbl>
## 1 neg_pred_value     6        0.167
## 2 nvw_a              1        1
plot_exercise_n_tries(ex_tries)

  • all in all less then a third of the exercise were answered correctly in the end, means most students gave up
mean(ex_tries$ex_correct)
## [1] 0.2857143
ex_prop_correct_per_try <- prop_correct_in_ith_try(ex_data)
plot_prop_correct_per_try(ex_prop_correct_per_try, "Proportion of correct code submissions per number of tries")

  • (on try 6, someone in tracking session #185 made a correct submission and then regressed to a wrong submission afterwards)

  • often, the submitted code is not correct R code

  • furthermore, students don’t understand that the last output is the result but assume that the have to use print(...) to return the result

filter(ex_data, !ex_correct) |>
    pull(value) |>
    trimws() |>
    paste(collapse = "\n---\n") |>
    cat()
## prevalence = 0,02
## sens = 0,914
## spec = 0,994
## 
## positiveIll = (sens * prevalence)/(sens*prevalence + (1 - spec)*(1 - prevalence))
## print(positiveIll)
## ---
## prevalence = 0.02
## sens = 0.914
## spec = 0.994
## 
## positiveIll = (sens * prevalence)/(sens*prevalence + (1 - spec)*(1 - prevalence))
## print(positiveIll)
## ---
## prevalence = 0.02
## sens = 0.914
## spec = 0.994
## 
## positiveIll = (sens * prevalence)/(sens*prevalence + (1 - spec)*(1 - prevalence))
## print(1-positiveIll)
## ---
## prevalence = 0.002
## sens = 0.914
## spec = 0.994
## 
## positiveIll = (sens * prevalence)/(sens*prevalence + (1 - spec)*(1 - prevalence))
## print(1-positiveIll)
## ---
## prevalence = 0.002
## sens = 0.914
## spec = 0.994
## 
## positiveIll = (sens * prevalence)/(sens*prevalence + (1 - spec)*(1 - prevalence))
## print(positiveIll)
## ---
## prevalence = 0.002
## sens = 0.914
## spec = 0.994
## 
## positiveIll = (sens * prevalence)/(sens*prevalence + (1 - spec)*(1 - prevalence))
## print(positiveIll)
## ---
## prevalence = 0.002
## sens = 0.914
## spec = 0.994
## 
## positiveIll = (sens * prevalence)/(sens*prevalence + (1 - spec)*(1 - prevalence))
## print(positiveIll)
## (sens * prevalence)/(sens*prevalence + (1 - spec)*(1 - prevalence))
## ---
## prevalence = 0.002
## sens = 0.914
## spec = 0.994
## 
## positiveIll = (sens * prevalence)/(sens*prevalence + (1 - spec)*(1 - prevalence))
## print(positiveIll)
## (sens * prevalence)/(sens*prevalence + (1 - spec)*(1 - prevalence))
## ---
## prevalence = 0.002
## sens = 0.914
## spec = 0.994
## 
## positiveIll = (sens * prevalence)/(sens*prevalence + (1 - spec)*(1 - prevalence))
## print(positiveIll)
## (sens * prevalence)/(sens*prevalence + (1 - spec)*(1 - prevalence))
## ---
## print(prevalence)
## ---
## # P(K-|T-)
## 
## t <- 1
## (spec   ) / sens
## ---
## # P(K-|T-)
## 
## t <- 1
## (spec   ) / sens
## ---
## # P(K-|T-)
## 
## (spec * (1 - prevalence)) / (1-(0.914 * 0.002 + (1−0.994) * (1−0.002)))
## ---
## # P(K-|T-)
## 
## (spec * (1 - prevalence)) / (1 - (0.914 * 0.002 + (1 - 0.994) * (1 - 0.002)))
## ---
## # P(K-|T-)
## 1 - (0.914 * 0.002 + (1 - 0.994) * (1 - 0.002))
## (spec * (1 - prevalence)) / (1 - (0.914 * 0.002 + (1 - 0.994) * (1 - 0.002)))
## ---
## # P(K-|T-)
## 0.994 * 0.998 / 0.992
## (spec * (1 - prevalence)) / (1 - (0.914 * 0.002 + (1 - 0.994) * (1 - 0.002)))
## ---
## 0.994 * 0.998
## ---
## # P(K-|T-)
## (0.994 * 0.998) 
## (spec * (1 - prevalence)) / (1 - (0.914 * 0.002 + (1 - 0.994) * (1 - 0.002)))
## ---
## nvw_param <- expand.grid(praev = c(0,1, 0,2, 0,3, 0,4, 0,5, 0,6, 0,7, 0,8, 0,9),
##                          sens = c(0,8, 0,91, 0,99))
## nvw_param
## ---
## nvw_param <- expand.grid(praev = c(0,1, 0,2, 0,3, 0,4, 0,5, 0,6, 0,7, 0,8, 0,9),
##                          sens = c(0,8, 0,91, 0,99))
## nvw_param
## ---
## nvw_param <- expand.grid(praev = seq(0,1, 0,9, length.out = 9),
##                          sens = c(0,8, 0,91, 0,99))
## nvw_param
## ---
## nvw_param <- expand.grid(praev = seq(0,1, 0,9, length.out = 9),
##                          sens = c(0,8, 0,91, 0,99))
## nvw_param
## ---
## nvw_param <- expand.grid(praev = seq(0,1, 0.9, length.out = 9),
##                          sens = c(0,8, 0,91, 0,99))
## nvw_param
## ---
## nvw_param <- expand.grid(praev = seq(0.1, 0.9, length.out = 9),
##                          sens = c(0.8, 0.91, 0.99))
## nvw_param
## ---
## print(prevalence)
## ---
## print(sens)
## ---
## *sens
## ---
## (sens*prevalence)/(sens*prevalence+(1-spec)*(1-prevelance))
## ---
## (sens*prevalence)/(sens*prevalence+(1-spec)*(1-prevelence))
## ---
## (sens*prevalence)/(sens*prevalence+(1-spec)*(1-prevalence))
## ---
## (sens*prevalence)/(sens*prevalence+(1-spec)*(1-prevalence))
## ---
## prevalence = 0.998
## (sens*prevalence)/(sens*prevalence+(1-spec)*(1-prevalence))
## ---
## sens
## ---
## sens
## spec
## ---
## sens
## spec
## prevalence
## ---
## sens
## spec
## prevalence
## 
## ntk <- 1 - sens
## ntk
## 
## nKnT
## ---
## sens
## spec
## prevalence
## 
## ntk <- 1 - sens
## ntk
## ---
## sens
## spec
## prevalence
## 
## ntk <- 1 - sens
## ntk
## 
## nk <- 1 - spec
## nk
## 
## prevalence / (prevalence*nk)
## ---
## sens
## spec
## prevalence
## 
## ntk <- 1 - sens
## ntk
## 
## nk <- 1 - spec
## nk
## 
## prevalence / ((prevalence*nk)+(ntk*spec) )
## ---
## sens
## spec
## prevalence
## 
## ntk <- 1 - sens
## ntk
## 
## nk <- 1 - spec
## nk
## 
## prevalence / ((prevalence*1-spec)+(1-sens*spec) )
## ---
## sens
## spec
## prevalence
## 
## ntk <- 1 - sens
## ntk
## 
## nk <- 1 - spec
## nk
## 
## prevalence / (((1-prevalence)*(1-spec))+(1-sens*spec) )
## ---
## sens
## spec
## prevalence
## 
## ntk <- 1 - sens
## ntk
## 
## nk <- 1 - spec
## nk
## 
## prevalence / (((1-prevalence)*(1-spec))+(1-sens*spec) )
## ---
## sens
## spec
## prevalence
## 
## ntk <- 1 - sens
## ntk
## 
## nk <- 1 - spec
## nk
## 
## (1-prevalence)*(1-spec) / ((1-prevalence)*(1-spec))+(1-sens*spec)
## ---
## sens
## spec
## prevalence
## 
## ntk <- 1 - sens
## ntk
## 
## nk <- 1 - spec
## nk
## 
## (1-prevalence)*(1-spec) / ((1-prevalence)*(1-spec))+(1-sens*spec)
## ---
## (0.994*0.998)/((0.994*0.998)+(1-0.914)*0.002)

Mouse tracks

tracks_features <- list()

for (track_sess_id in sort(unique(tracking_data$track_sess_id))) {
    mouse_tracks_data <- mouse_tracks_for_tracking_sess(tracking_data, track_sess_id)
    form_factor <- mouse_tracks_data$form_factor
    mouse_tracks_data <- mouse_tracks_data$tracks
    
    tracksess_tracks_features <- mouse_tracks_features(mouse_tracks_data)
    tracksess_tracks_features$track_sess_id <- track_sess_id
    tracksess_tracks_features$form_factor <- form_factor
    tracks_features[[track_sess_id]] <- tracksess_tracks_features
    
    #print(c(track_sess_id, min(mouse_tracks_data$mouse_y), max(mouse_tracks_data$mouse_y)))
    
    print(plot_mouse_tracks_for_tracking_session(mouse_tracks_data, track_sess_id, form_factor))
}

tracks_features_per_track_sess <- bind_rows(tracks_features) |>
    arrange(track_sess_id, t_step)

tracks_features_per_track_sess_filled <-
    expand.grid(track_sess_id = sort(unique(tracks_features_per_track_sess$track_sess_id)),
            t_step = 1:max(tracks_features_per_track_sess$t_step)) |>
    left_join(tracks_features_per_track_sess, c('track_sess_id', 't_step')) |>
    arrange(track_sess_id, t_step) |>
    mutate(mean_t_step_V = ifelse(is.na(mean_t_step_V), 0, mean_t_step_V)) |>
    fill(form_factor)
plot_mouse_velocity_heatmap(tracks_features_per_track_sess_filled)

filter(tracks_features_per_track_sess_filled, t_step <= 10) |>
    plot_mouse_velocity_heatmap()